

#delimit;
set more off;
clear all;


cd "C:\Users\ejm5\Dropbox\Soccer";
use "C:\Users\ejm5\Dropbox\Soccer\data_for_do\data_analysis.dta";
log using "models", replace text;


#delimit;
drop _merge;

xtset team_id year;
merge 1:1 team_id year using "C:\Users\ejm5\Dropbox\Soccer\Data\top100_v2.dta";


#delimit;
drop _merge;

xtset team_id year;
merge 1:1 team_id year using "C:\Users\ejm5\Dropbox\Soccer\Data\elo_clubs.dta";

replace winning_per=winning_per*100;
save "C:\Users\ejm5\Dropbox\Soccer\data_for_do\data_analysis_v2.dta", replace;


drop _merge;
sort team_id year;
merge 1:1 team_id year using "C:\Users\ejm5\Dropbox\Soccer\Data\coach_castrol.dta";
drop team;


/*Table 1*/
sum avg_goal_diff ldnd ln_total  ln_average  quota standard_stock lfi cofi comlang_off iv_dist ldnd_high ldnd_low fst_dist;
sum spi_o spi_d par_score mean_ws_rating rating elorating if avg_goal_diff !=.;

/*Table 2a*/
pwcorr avg_goal_diff ldnd ln_total  ln_average quota standard_stock lfi comlang_off cofi iv_dist ldnd_high ldnd_low fst_dist, star(5);


/*Table 2b*/
pwcorr ldnd ln_total ln_avg top100 spi_o spi_d par_score mean_ws_rating rating elorating, star(5);



/* Table 3 */; 
#delimit;
regress avg_goal_diff ldnd  i.year i.league_id, cluster(team);
outreg2 using table3, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes) replace; 

regress avg_goal_diff ldnd ln_total  i.year i.league_id, cluster(team);
outreg2 using table3, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes); 

regress avg_goal_diff ldnd ln_average  i.year i.league_id, cluster(team);
outreg2 using table3, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes); 


regress ln_total i.team i.year, cluster(league_id);
predict yhat;  
predict yres, resid;

regress avg_goal_diff ldnd yres  i.year i.league_id, cluster(team);
outreg2 using table3, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes); 

regress avg_goal_diff ldnd ln_average i.team_id i.year, cluster(league_id);
outreg2 using table3, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, Team FE, Yes); 

regress avg_goal_diff ldnd ln_total elorating  i.year i.league_id, cluster(team);
outreg2 using table3, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes); 

regress avg_goal_diff ldnd ln_average elorating  i.year i.league_id, cluster(team);
outreg2 using table3, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes); 

regress avg_goal_diff ldnd ln_total top100  i.year i.league_id, cluster(team);
outreg2 using table3, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes); 

regress avg_goal_diff ldnd ln_average top100  i.year i.league_id, cluster(team);
outreg2 using table3, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes) excel; 



/* Table 4 */; 

#delimit;
generate ldnd_sq=ldnd^2;
 
#delimit;
set more off;

tsset team year, yearly; 

/* Table 4 */; 
#delimit;
regress avg_points ldnd ln_average  i.year i.league_id, cluster(team);
outreg2 using table4, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, Team FE, Yes) replace; 

regress avg_points ldnd ln_average elorating i.year i.league_id, cluster(team);
outreg2 using table4, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, Team FE, Yes); 

regress winning_per ldnd ln_average  i.year i.league_id, cluster(team);
outreg2 using table4, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, Team FE, Yes); 

regress winning_per ldnd ln_average elorating  i.year i.league_id, cluster(team);
outreg2 using table4, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, Team FE, Yes); 

regress avg_goal_diff lfi ln_average  i.year i.league_id, cluster(team);
outreg2 using table4, bdec(3) tdec(3) e(rmse)  addtext(Year FE, Yes, Team FE, Yes); 

regress avg_goal_diff cofi ln_average  i.year i.league_id, cluster(team);
outreg2 using table4, bdec(3) tdec(3) e(rmse)  addtext(Year FE, Yes, Team FE, Yes); 

regress avg_goal_diff comlang_off ln_average  i.year i.league_id, cluster(team);
outreg2 using table4, bdec(3) tdec(3) e(rmse)  addtext(Year FE, Yes, Team FE, Yes); 

regress avg_goal_diff iv_dist ln_average  i.year i.league_id, cluster(team);
outreg2 using table4, bdec(3) tdec(3) e(rmse) excel addtext(Year FE, Yes, Team FE, Yes); 





/* Table 5 */; 

#delimit;
replace ldnd_sq=ldnd^2;
 
#delimit;
set more off;
 
tsset team year, yearly; 
 
regress avg_goal_diff ldnd ldnd_sq ln_average i.year i.league_id, cluster(team);
outreg2 using alt_models, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes) replace;
 
regress avg_goal_diff ldnd_high ln_average i.year i.league_id, cluster(team);
outreg2 using alt_models, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes); 
 
regress avg_goal_diff ldnd_low ln_average i.year i.league_id, cluster(team);
outreg2 using alt_models, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes);  
 
regress avg_goal_diff fst_dist ln_average i.year i.league_id, cluster(team);
outreg2 using alt_models, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes); 

regress L.avg_goal_diff ldnd ln_average i.year i.league, cluster(team);
outreg2 using alt_models, bdec(3) tdec(3) e(rmse) excel addtext(Year FE, Yes, League FE, Yes); 





log c;

/*Table 6*/
#delimit;
log using sem.smcl, replace;
sem (avg_goal_diff <-  ln_average ldnd) ( ln_average <- standard_stock ldnd )   (ldnd<- ln_average quota) , covariance( standard_stock  quota)  method(ml);
outreg2 using sem, bdec(3) tdec(3) e(rmse) replace;
estat gof, stats(all);
estat eqgof;
ivreg2 avg_goal_diff ldnd (ln_average = standard_stock), first;
outreg2 using sem, bdec(3) tdec(3) e(all);
ivreg2 avg_goal_diff ln_average (ldnd = quota ), first;
outreg2 using sem, bdec(3) tdec(3) e(all) excel;
log close;


#delimit;
log using sem2.smcl, replace;
sem (avg_goal_diff <-  ln_average cofi) ( ln_average <- standard_stock cofi )   (cofi<- ln_average quota) , covariance( standard_stock  quota)  method(ml);
outreg2 using sem2, bdec(3) tdec(3) e(rmse) replace;
estat gof, stats(all);
estat eqgof;
ivreg2 avg_goal_diff cofi (ln_average = standard_stock), first;
outreg2 using sem2, bdec(3) tdec(3) e(all);
ivreg2 avg_goal_diff ln_average (cofi = quota ), first;
outreg2 using sem, bdec(3) tdec(3) e(all) excel;
log close;

sem (avg_goal_diff <-  ln_average cofi top100) ( ln_average <- standard_stock  top100)   (cofi<- ln_average quota top100) , covariance( standard_stock  quota top100)  method(ml)
sem (avg_goal_diff <-  ln_average ldnd top100) ( ln_average <- standard_stock ldnd top100 )   (ldnd<- ln_average quota top100) , covariance( standard_stock  quota top100)  method(ml);



/*Figure 1*/
sum avg_goal_diff 



#delimit;
generate Bundesliga=ldnd if league_id==1;
generate SerieA=ldnd if league_id==2;
generate Premier=ldnd if league_id==3;
generate LaLiga=ldnd if league_id==5;
generate Ligue1=ldnd if league_id==4;

#delimit;
graph hbar (mean) Bundesliga SerieA Premier LaLiga Ligue1, nofill  over(team_id, sort(ldnd) 
descending label(labcolor(black) labsize(small)))  bargap(0) outergap(0) intensity(100) lintensity(0)
legend(size(vsmall) rows(5) position(4) ring(0) label(1 Bundesliga) label(2 Serie A) label(3 Premier) label(4 Ligue 1) label(5 La Liga))
bar(1, fcolor(gs4) lcolor(gs4) lwidth(thin) fintensity(inten100) ) bar(2, fcolor(blue) lcolor(blue) lwidth(thin) fintensity(inten100) )
 bar(3, fcolor(maroon) lcolor(maroon) lwidth(thin) fintensity(inten100)) bar(4, fcolor(ltblue) lcolor(ltblue) lwidth(thin) fintensity(inten100)) 
 bar(5, fcolor(gs14) lcolor(gs14) lwidth(thin) fintensity(inten100)) blabel(bar, size(vsmall) color(black) position(outside) format(%9.3g))
 ylab(0(5)90, labsize(vsmall)) ytitle("Average Linguistic Distance (2003-2012)", size(small)  margin(medium)) title("By Team", size(medlarge) color(black)) legend(off);
graph save Figure2_a.gph, replace;



#delimit;
graph hbar (mean) Bundesliga SerieA Premier LaLiga Ligue1, nofill  over(league_id, sort(ldnd) 
descending label(labcolor(black) labsize(medium)))  bargap(0) outergap(0) intensity(100) lintensity(0)
bar(1, fcolor(gs4) lcolor(gs4) lwidth(thin) fintensity(inten100) ) bar(2, fcolor(blue) lcolor(blue) lwidth(thin) fintensity(inten100) )
 bar(3, fcolor(maroon) lcolor(maroon) lwidth(thin) fintensity(inten100)) bar(4, fcolor(ltblue) lcolor(ltblue) lwidth(thin) fintensity(inten100)) 
 bar(5, fcolor(gs14) lcolor(gs14) lwidth(thin) fintensity(inten100)) blabel(bar, size(medium) color(black) position(outside) format(%9.4g))
 ylab(0(5)90, labsize(small)) ytitle("Average Linguistic Distance (2003-2012)", size(medium)  margin(medium)) legend(off) title("By League", size(vlarge) color(black));
graph save Figure2_b.gph, replace;


#delimit;
graph hbar (mean) ldnd, nofill  over(year,  descending label(labcolor(black) labsize(medium)))  bargap(0) outergap(0) intensity(100) lintensity(0)
 blabel(bar, size(medium) color(black) position(outside) format(%9.4g))
 ylab(0(5)90, labsize(small)) ytitle("Average Linguistic Distance (All Teams)", size(medium)  margin(medium)) legend(off) title("By Year", size(vlarge) color(black));
graph save Figure2_c.gph, replace;


graph combine Figure2_b.gph Figure2_c.gph, xcommon rows(2);
graph save Figure2bc.gph, replace;

graph combine Figure2_a.gph Figure2bc.gph, xcommon cols(2) imargin(tiny);
graph save Figure2.gph, replace;



/*Coach Analysis*/
#delimit;
encode coach, gen(coach_id);
replace coach_years=. if coach=="";
sum coach_years;
hilo coach_year coach coach_id;
generate coach_1y_elo=elorating if coach_year==1;
by coach_id, sort: egen max_coach_1y_elo=max(coach_1y_elo);
gen inter_coach_diversity= max_coach_1y_elo*ldnd;
sum max_coach_1y_elo, detail;
generate good_coach=1 if max_coach>=r(p50);
replace good_coach=0 if max_coach<r(p50);


regress avg_goal_diff ldnd ln_average i.year i.coach_id, cluster(team);
outreg2 using coach, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes) replace;

xi: regress avg_goal_diff i.foreign_coach*ldnd ln_average i.year i.league_id, cluster(team);
outreg2 using coach, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes);

xi: regress avg_goal_diff ldnd max_coach_1y_elo inter_coach_diversity ln_average i.year i.league_id, cluster(team);
outreg2 using coach, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes);

xi: regress avg_goal_diff foreign_coach ldnd max_coach_1y_elo inter_coach_diversity ln_average i.year i.league_id, cluster(team);
outreg2 using coach, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes);

xi: regress avg_goal_diff i.good_coach*ldnd i.foreign_coach*ldnd ln_average i.year i.league_id, cluster(team);
outreg2 using coach, bdec(3) tdec(3) e(rmse) addtext(Year FE, Yes, League FE, Yes) excel;

/*Presentation*/
#delimit;
preserve;
collapse ldnd quota ln_average standard_stock , by(league_id year);
replace quota=8 if league==3 & year>=2011;

#delimit;
twoway (lfit ldnd quota, lpattern(dash) lcolor(red) lpattern(medium))
(scatter ldnd quota if league==1,  mlabsize(tiny) mlabcolor(black)  mlabel(year) msymbol(diamond) mcolor(gs4) mlabposition(3)) 
(scatter ldnd quota if league==2, mlabsize(tiny)  mlabcolor(black)   mlabel(year) msymbol(square) mcolor(navy) mlabposition(3))  
(scatter ldnd quota if league==3, mlabsize(tiny)  mlabcolor(black)   mlabel(year) msymbol(circle) mcolor(maroon) mlabposition(3))  
(scatter ldnd quota if league==4, mlabsize(tiny)  mlabcolor(black)   mlabel(year) msymbol(triangle) mcolor(ltblue) mlabposition(3))  
(scatter ldnd quota if league==5, mlabsize(tiny)   mlabcolor(black)  mlabel(year) msymbol(x) mcolor(gs10) mlabposition(3)),
legend(rows(3) size(vsmall) position(7) ring(0) label(1 Fit) label(2 Germany) label(3 Italy) label(4 UK) label(5 Spain) label(6 France))
ytitle("Average Linguistic Distance in League", size(medium) margin(medium)) xtitle("Home Grown Player Slots", size(medium) margin(medium));

#delimit;
xtset league_id year;

#delimit;
twoway (lfit ln_average l.standard_stock, lpattern(dash) lcolor(red) lpattern(medium))
(scatter ln_average l.standard_stock if league==1,  mlabsize(tiny) mlabcolor(black)  mlabel(year) msymbol(diamond) mcolor(gs4) mlabposition(3)) 
(scatter ln_average l.standard_stock if league==2, mlabsize(tiny)  mlabcolor(black)   mlabel(year) msymbol(square) mcolor(navy) mlabposition(3))  
(scatter ln_average l.standard_stock if league==3, mlabsize(tiny)  mlabcolor(black)   mlabel(year) msymbol(circle) mcolor(maroon) mlabposition(3))  
(scatter ln_average l.standard_stock if league==4, mlabsize(tiny)  mlabcolor(black)   mlabel(year) msymbol(triangle) mcolor(ltblue) mlabposition(3))  
(scatter ln_average l.standard_stock if league==5, mlabsize(tiny)   mlabcolor(black)  mlabel(year) msymbol(x) mcolor(gs10) mlabposition(3)),
legend(rows(3) size(vsmall) position(5) ring(0) label(1 Fit) label(2 Germany) label(3 Italy) label(4 UK) label(5 Spain) label(6 France))
ytitle("Average Player Value (ln)", size(medium) margin(medium)) xtitle("Lag Standardized Stock Market (2003=100)", size(medium) margin(medium));
restore;



/*Figure 5*/
/*preserve
set more off
regress avg_goal_diff ln_total  i.year i.league_id, cluster(team)
predict avg, resid
regress  ldnd ln_total  i.year i.league_id, cluster(team)
predict ldnd_resid, resid
collapse (mean) avg ldnd_resid league_id (count) n=ldnd, by(team_id )
scatter avg ldnd_resid [weight=n]
twoway (lfit avg ldnd_resid) (scatter avg ldnd_resid [weight=n])*/

#delimit;
collapse (mean) avg_goal_diff winning_per ldnd cofi total average elorating quota (count) n=avg_goal_diff, by(team_id league_id );

#delimit;
twoway scatter avg ldnd [w=total], legend(off) msymbol(Oh) mcolor(gs11)  ylabel(-1.5 (.5) 1.5, labsize(small)) xlabel(0(10)100, labsize(small))
|| lfit avg ldnd
|| scatter avg ldnd if league_id==1, msymbol(none) mlabel(team) mlabsize(vsmall) mcolor(gs2) mlabcolor(navy) msize(vsmall)
|| scatter avg ldnd if league_id==2, msymbol(none) mlabel(team) mlabsize(vsmall) mcolor(gs2) mlabcolor(purple) msize(vsmall)
|| scatter avg ldnd if league_id==3, msymbol(none) mlabel(team) mlabsize(vsmall) mcolor(gs2) mlabcolor(maroon) msize(vsmall)
|| scatter avg ldnd if league_id==4, msymbol(none) mlabel(team) mlabsize(vsmall) mcolor(gs2) mlabcolor(green) msize(vsmall)
|| scatter avg ldnd if league_id==5, msymbol(none) mlabel(team) mlabsize(vsmall) mcolor(gs2) mlabcolor(blue) msize(vsmall)
ytitle("Average Goal Differential Per Season", size(medsmall) margin(medium))
xtitle("Linguistic Diversity", size(medsmall) margin(medium))
note("Bubble Size=Transfer Value in Millions of British Pounds; r=0.34*");
graph save Figure5a.gph, replace;



twoway scatter avg ldnd [w=elo], legend(off) msymbol(Oh) mcolor(gs13)  ylabel(-1.5 (.5) 1.5, labsize(small)) xlabel(0(10)100, labsize(small))
|| lfit avg ldnd
|| scatter avg ldnd if league_id==1, msymbol(none) mlabel(team) mlabsize(vsmall) mcolor(gs4) mlabcolor(gs4) msize(vsmall)
|| scatter avg ldnd if league_id==2, msymbol(none) mlabel(team) mlabsize(vsmall) mcolor(blue) mlabcolor(navy) msize(vsmall)
|| scatter avg ldnd if league_id==3, msymbol(none) mlabel(team) mlabsize(vsmall) mcolor(maroon) mlabcolor(maroon) msize(vsmall)
|| scatter avg ldnd if league_id==4, msymbol(none) mlabel(team) mlabsize(vsmall) mcolor(ltblue) mlabcolor(blue) msize(vsmall)
|| scatter avg ldnd if league_id==5, msymbol(none) mlabel(team) mlabsize(vsmall) mcolor(black) mlabcolor(black) msize(vsmall)
ytitle("Average Goal Differential Per Season", size(medsmall) margin(medium))
xtitle("Linguistic Diversity", size(medsmall) margin(medium))
note("Bubble Size=Elo Rating");
graph save Figure5b.gph, replace;


graph combine Figure5a.gph Figure5b.gph, ycommon imargin(tiny);
graph save Figure5.gph, replace;
